aggr <- SubsetData(aggr, subset.name = "nUMI", accept.high = 50000)
aggr <- SubsetData(aggr, subset.name = "percent.mito", accept.high = 0.1)
#Regress of unwanted variation
aggr <- RegressOut(aggr, latent.vars = c("nUMI"))
#Show higly variable genes
aggr <- MeanVarPlot(aggr ,fxn.x = expMean, fxn.y = logVarDivMean, x.low.cutoff = 0.0125, x.high.cutoff = 3,
y.cutoff = 0.5, do.contour = F, cex.text.use=0.7)
length(aggr@var.genes)
aggr <- PCA(aggr, pc.genes = aggr@var.genes, do.print = TRUE, pcs.store=100)
aggr <- ProjectPCA(aggr)
names<-sham@data@Dimnames[1]
names<-unlist(names)
write.table(names, file = "~/Dropbox/data/cardiac_stem_cells/single_cell_Feb2017/ShamVsMIaggr/clustersPC42Res06/MIvsSham_expressed_genes.txt",
row.names = FALSE, )
names<-aggr@data@Dimnames[1]
names<-unlist(names)
write.table(names, file = "~/Dropbox/data/cardiac_stem_cells/single_cell_Feb2017/ShamVsMIaggr/clustersPC42Res06/MIvsSham_expressed_genes.txt",
row.names = FALSE, quote=FALSE, col.names=FALSE)
aggr <- FindClusters(aggr, pc.use = 1:42, resolution = 0.6, print.output = 0, save.SNN = T)
### t-SNE analysis
aggr <- RunTSNE(aggr, dims.use = 1:42, do.fast = T)
TSNEPlot(aggr)
aggr.names <- names(aggr@ident)
aggr.tsne1 <- aggr@tsne.rot$tSNE_1
names(aggr.tsne1) <- names(aggr@ident)
aggr.tsne2 <- aggr@tsne.rot$tSNE_2
names(aggr.tsne2) <- names(aggr@ident)
cellSample <- sub(".*-(.*)","\\1", aggr.names)
cellSample <- replace(cellSample, cellSample=="1", "Sham")
cellSample <- replace(cellSample, cellSample=="2", "MI")
ggData <- data.frame(tSNE_1=aggr.tsne1, tSNE_2=aggr.tsne2, cluster=aggr@ident, experiment=cellSample)
ggData$experiment <- factor(ggData$experiment, levels = c("Sham", "MI"))
ggplot(ggData, aes(tSNE_1, tSNE_2, color=cluster)) + geom_point(size=1.0) + xlab("tSNE 1") + ylab("tSNE 2") +
facet_wrap(~experiment) + ggtitle("Cell clusterings: Sham vs MI")
library(ggplot2)
library(Seurat)
library(dplyr)
library(Matrix)
# Read in data
day3.data <- Read10X("~/Dropbox/data/cardiac_stem_cells/single_cell_Feb2017/ShamVsMIaggr/gfpouts/filtered_gene_bc_matrices/redata-cellranger-gfp-mm10")
day7.data <- Read10X("~/Dropbox/data/cardiac_stem_cells/single_cell_June2017/GFP_ShamVsMI_Day7/filtered_gene_bc_matrices/redata-cellranger-gfp-mm10")
# Append cell names so they are unique
colnames(day3.data) = paste0(colnames(day3.data), "-3")
colnames(day7.data) = paste0(colnames(day7.data), "-7")
# Create Seurat object for day 3 data
day3 <- CreateSeuratObject(raw.data = day3.data, min.cells = 10, min.genes = 200, project = "Day3")
mito.genes <- grep("^mt-", rownames(day3@data), value = TRUE)
percent.mito <- Matrix::colSums(day3@raw.data[mito.genes, ])/Matrix::colSums(day3@raw.data)
day3 <- AddMetaData(day3, percent.mito, "percent.mito")
day3.data <- Read10X("~/Dropbox/data/cardiac_stem_cells/single_cell_Feb2017/ShamGFP/filtered_gene_bc_matrices/redata-cellranger-gfp-mm10")
day3.data <- Read10X("~/Dropbox/data/cardiac_stem_cells/single_cell_Feb2017/ShamVsMIaggr/gfpouts/filtered_gene_bc_matrices/redata-cellranger-gfp-mm10")
day3.data <- Read10X("~/Dropbox/data/cardiac_stem_cells/single_cell_Feb2017/ShamVsMIaggr/gfpouts/filtered_gene_bc_matrices_mex/redata-cellranger-gfp-mm10")
day7.data <- Read10X("~/Dropbox/data/cardiac_stem_cells/single_cell_June2017/GFP_ShamVsMI_Day7/filtered_gene_bc_matrices/redata-cellranger-gfp-mm10")
day7.data <- Read10X("/home/ralph/Dropbox/data/cardiac_stem_cells/single_cell_June2017/GFP_ShamVsMI_Day7/filtered_gene_bc_matrices_mex/redata-cellranger-gfp-mm10")
colnames(day3.data) = paste0(colnames(day3.data), "-3")
colnames(day7.data) = paste0(colnames(day7.data), "-7")
day3 <- CreateSeuratObject(raw.data = day3.data, min.cells = 10, min.genes = 200, project = "Day3")
mito.genes <- grep("^mt-", rownames(day3@data), value = TRUE)
percent.mito <- Matrix::colSums(day3@raw.data[mito.genes, ])/Matrix::colSums(day3@raw.data)
day3 <- AddMetaData(day3, percent.mito, "percent.mito")
day3 <- FilterCells(object = day3, subset.names = c("nGene", "nUMI", "percent.mito"),
low.thresholds = c(200, 500, -Inf), high.thresholds = c(6000, 50000, 0.05))
day3 <- NormalizeData(object = day3)
day3 <- ScaleData(object = day3)
day3 <- FindVariableGenes(object = day3, do.plot = FALSE)
# Create Seurat object for day 7 data
day7 <- CreateSeuratObject(raw.data = day7.data, min.cells = 10, min.genes = 200, project = "Day7")
mito.genes <- grep("^mt-", rownames(day7@data), value = TRUE)
percent.mito <- Matrix::colSums(day7@raw.data[mito.genes, ])/Matrix::colSums(day7@raw.data)
day7 <- AddMetaData(day7, percent.mito, "percent.mito")
day7 <- FilterCells(object = day7, subset.names = c("nGene", "nUMI", "percent.mito"),
low.thresholds = c(200, 1000, -Inf), high.thresholds = c(5000, 25000, 0.05))
day7 <- NormalizeData(object = day7)
day7 <- ScaleData(object = day7)
day7 <- FindVariableGenes(object = day7, do.plot = FALSE)
#get union of highly variable genes
hvg.day3 <- rownames(x = head(x = day3@hvg.info, n = 2000))
hvg.day7 <- rownames(x = head(x = day7@hvg.info, n = 2000))
hvg.union <- union(x = hvg.day3, y = hvg.day7)
# Set protocol
day3@meta.data[, "Day"] <- "3"
day7@meta.data[, "Day"] <- "7"
aggr <- RunCCA(object = day3, object2 = day7, genes.use = hvg.union)
p1 <- DimPlot(object = aggr, reduction.use = "cca", group.by = "Day", pt.size = 0.5,
do.return = TRUE)
p2 <- VlnPlot(object = aggr, features.plot = "CC1", group.by = "Day", do.return = TRUE)
plot_grid(p1, p2)
DimHeatmap(object = aggr, reduction.type = "cca", cells.use = 500, dim.use = 1:9,
do.balanced = TRUE)
DimHeatmap(object = aggr, reduction.type = "cca", cells.use = 500, dim.use = 10:18,
do.balanced = TRUE)
DimHeatmap(object = aggr, reduction.type = "cca", cells.use = 500, dim.use = 19:27,
do.balanced = TRUE)
aggr <- CalcVarExpRatio(object = aggr, reduction.type = "pca", grouping.var = "Day",
dims.use = 1:13)
aggr.all.save <- aggr
aggr <- SubsetData(object = aggr, subset.name = "var.ratio.pca", accept.low = 0.5)
aggr.discard <- SubsetData(object = sham.all.save, subset.name = "var.ratio.pca",
accept.high = 0.5)
median(x = aggr@meta.data[, "nGene"])
median(x = aggr.discard@meta.data[, "nGene"])
aggr.discard <- SubsetData(object = aggr.all.save, subset.name = "var.ratio.pca",
accept.high = 0.5)
median(x = aggr@meta.data[, "nGene"])
median(x = aggr.discard@meta.data[, "nGene"])
aggr <- AlignSubspace(object = aggr, reduction.type = "cca", grouping.var = "Day",
dims.align = 1:18)
p1 <- VlnPlot(object = aggr, features.plot = "ACC1", group.by = "Day",
do.return = TRUE)
p2 <- VlnPlot(object = aggr, features.plot = "ACC2", group.by = "Day",
do.return = TRUE)
plot_grid(p1, p2)
aggr <- FindClusters(object = aggr, resolution=c(0.6, 0.8, 1.0, 1.2), reduction.type = "cca.aligned", dims.use = 1:18,
print.output = 0, save.SNN = TRUE)
aggr <- RunTSNE(object = aggr, reduction.use = "cca.aligned", dims.use = 1:18,
do.fast = TRUE, seed.use=2)
TSNEPlot(sham)
TSNEPlot(aggr)
p1 <- TSNEPlot(object = aggr, group.by = "Day", do.return = TRUE, pt.size = 0.5)
p2 <- TSNEPlot(object = aggr, do.return = TRUE, pt.size = 0.5)
plot_grid(p1, p2)
aggr.names <- names(aggr@ident)
aggr.tsne1 <- aggr@dr$tsne@cell.embeddings[, 1]
names(aggr.tsne1) <- names(aggr@ident)
aggr.tsne2 <- aggr@dr$tsne@cell.embeddings[, 2]
names(aggr.tsne2) <- names(aggr@ident)
cellSample <- sub(".*-(.*)","\\1", aggr.names)
cellSample <- replace(cellSample, cellSample=="3", "Day 3")
cellSample <- replace(cellSample, cellSample=="7", "Day 7")
col.set <- scales::hue_pal()(9)
ggData <- data.frame(tSNE_1=aggr.tsne1, tSNE_2=aggr.tsne2, cluster=aggr@ident, experiment=cellSample)
ggplot(ggData, aes(tSNE_1, tSNE_2, color=cluster)) + geom_point(size=1.0) + xlab("tSNE 1") + ylab("tSNE 2") +
ggtitle("Sham GFP+ cell clusterings") + scale_color_manual(values=col.set, breaks=as.character(c(0:8)),
labels=as.character(c(1:9)), name="Cluster") + guides(color = guide_legend(override.aes = list(size=4))) +
facet_wrap(~experiment)
col.set <- scales::hue_pal()(16)
ggData <- data.frame(tSNE_1=aggr.tsne1, tSNE_2=aggr.tsne2, cluster=aggr@ident, experiment=cellSample)
ggplot(ggData, aes(tSNE_1, tSNE_2, color=cluster)) + geom_point(size=1.0) + xlab("tSNE 1") + ylab("tSNE 2") +
ggtitle("Sham GFP+ cell clusterings") + scale_color_manual(values=col.set, breaks=as.character(c(0:15)),
labels=as.character(c(1:16)), name="Cluster") + guides(color = guide_legend(override.aes = list(size=4))) +
facet_wrap(~experiment)
thisGene = "Wif1"
expression <- sham@data[thisGene, ]
ggData <- data.frame(tSNE_1=sham.tsne1, tSNE_2=sham.tsne2, cluster=sham@ident, experiment=cellSample, Expression=expression)
ggData$experiment <- factor(ggData$experiment, levels = c("Day 3", "Day 7"))
ggplot(ggData, aes(tSNE_1, tSNE_2, color=expression)) + geom_point(size=1.0) + xlab("tSNE 1") + ylab("tSNE 2") +
ggtitle(paste0(thisGene, " expression: Day 3 vs Day7")) +
scale_color_gradient2(low="#d9d9d9", mid="red", high="brown", midpoint=(max(expression)-min(expression))/2, name="")
expression <- aggr@data[thisGene, ]
thisGene = "Wif1"
expression <- aggr@data[thisGene, ]
ggData <- data.frame(tSNE_1=aggr.tsne1, tSNE_2=aggr.tsne2, cluster=aggr@ident, experiment=cellSample, Expression=expression)
ggData$experiment <- factor(ggData$experiment, levels = c("Day 3", "Day 7"))
ggplot(ggData, aes(tSNE_1, tSNE_2, color=expression)) + geom_point(size=1.0) + xlab("tSNE 1") + ylab("tSNE 2") +
ggtitle(paste0(thisGene, " expression: Day 3 vs Day7")) +
scale_color_gradient2(low="#d9d9d9", mid="red", high="brown", midpoint=(max(expression)-min(expression))/2, name="")
thisGene = "Cdc20"
expression <- aggr@data[thisGene, ]
ggData <- data.frame(tSNE_1=aggr.tsne1, tSNE_2=aggr.tsne2, cluster=aggr@ident, experiment=cellSample, Expression=expression)
ggData$experiment <- factor(ggData$experiment, levels = c("Day 3", "Day 7"))
ggplot(ggData, aes(tSNE_1, tSNE_2, color=expression)) + geom_point(size=1.0) + xlab("tSNE 1") + ylab("tSNE 2") +
ggtitle(paste0(thisGene, " expression: Day 3 vs Day7")) +
scale_color_gradient2(low="#d9d9d9", mid="red", high="brown", midpoint=(max(expression)-min(expression))/2, name="")
thisGene = "Postn"
expression <- aggr@data[thisGene, ]
ggData <- data.frame(tSNE_1=aggr.tsne1, tSNE_2=aggr.tsne2, cluster=aggr@ident, experiment=cellSample, Expression=expression)
ggData$experiment <- factor(ggData$experiment, levels = c("Day 3", "Day 7"))
ggplot(ggData, aes(tSNE_1, tSNE_2, color=expression)) + geom_point(size=1.0) + xlab("tSNE 1") + ylab("tSNE 2") +
ggtitle(paste0(thisGene, " expression: Day 3 vs Day7")) +
scale_color_gradient2(low="#d9d9d9", mid="red", high="brown", midpoint=(max(expression)-min(expression))/2, name="")
col.set <- scales::hue_pal()(16)
ggData <- data.frame(tSNE_1=aggr.tsne1, tSNE_2=aggr.tsne2, cluster=aggr@ident, experiment=cellSample)
ggplot(ggData, aes(tSNE_1, tSNE_2, color=cluster)) + geom_point(size=1.0) + xlab("tSNE 1") + ylab("tSNE 2") +
ggtitle("Sham GFP+ cell clusterings") + scale_color_manual(values=col.set, breaks=as.character(c(0:15)),
labels=as.character(c(1:16)), name="Cluster") + guides(color = guide_legend(override.aes = list(size=4))) +
facet_wrap(~experiment)
thisGene = "Ly6a"
expression <- aggr@data[thisGene, ]
ggData <- data.frame(tSNE_1=aggr.tsne1, tSNE_2=aggr.tsne2, cluster=aggr@ident, experiment=cellSample, Expression=expression)
ggData$experiment <- factor(ggData$experiment, levels = c("Day 3", "Day 7"))
ggplot(ggData, aes(tSNE_1, tSNE_2, color=expression)) + geom_point(size=1.0) + xlab("tSNE 1") + ylab("tSNE 2") +
ggtitle(paste0(thisGene, " expression: Day 3 vs Day7")) +
scale_color_gradient2(low="#d9d9d9", mid="red", high="brown", midpoint=(max(expression)-min(expression))/2, name="")
save(aggr, day3, day7, file="Day3_Day7_alignment.RData")
library(edgeR)
library(ggplot2)
library(DESeq2)
library(RUVSeq)
sessionInfo()
library(DESeq2)
sessionInfo()
?read.table
?read.csv
library(Matrix)
?writeMM
?make.unique
apa.data <- as.matrix(read.table("~/Dropbox/SingleCell_PolyA/Data/9k_neurons/counts_by_polyA_v2.tab",
header = TRUE, row.names = 1), "sparseMatrix")
apa.data <- as.matrix(read.table("~/Dropbox/SingleCell_PolyA/Data/9k_neurons/counts_by_polyA_v2.tab",
header = TRUE), "sparseMatrix")
rownames(apa.data) = make.unique(apa.data[, 1], sep="_")
apa.data = apa.data[, -1]
apa.data = as.matrix(apa.data, "spareMatrix")
apa.data[1, 1:10]
apa.data = as.matrix(as.numeric(apa.data), "spareMatrix")
apa.data[1, 1:10]
class(apa.data)
dim(apa.data)
?read.table
setwd("~/Dropbox/New format of the single cell paper 2018/eLife revision/eLife Supplementary files/Source code file 1 - R code/Clustering")
load("~/Dropbox/data/cardiac_stem_cells/single_cell_October2017/Seurat_TIP_D7_MID3_54PC.RData")
## First load the relevant packages we'll be using.
library(ggplot2)
library(Seurat)
library(plyr)
library(dplyr)
library(Matrix)
library(cowplot)
tip.aggr <- SetAllIdent(tip.aggr, id="res.1.2")
tip.aggr = aggr
remove(aggr)
new.labels <- c("M1M\u03A6", "EC1", "F-SL", "F-Act", "F-SH", "BC", "M2M\u03A6", "M1Mo", "MYO",
"EC3", "DC", "EC2", "TC1-Cd8", "TC2-Cd4", "MAC-TR", "Mural", "M2M\u03A6-EC", "Cyc",
"MAC6", "MAC-IFNIC", "MAC7", "MAC8", "F-EC", "F-WntX", "EC-L1", "NKC", "EC-L2", "BC-TC", "Glial")
tip.aggr@ident = plyr::mapvalues(x = tip.aggr@ident, from = names(table(tip.aggr@ident)), to = p.labels)
tip.aggr@ident = plyr::mapvalues(x = tip.aggr@ident, from = names(table(tip.aggr@ident)), to = new.labels)
length(table(tip.aggr@ident))
tip.aggr <- SetAllIdent(tip.aggr, id="res.1.2")
new.labels <- c("M1M\u03A6", "EC1", "F-SL", "F-Act", "F-SH", "BC", "M2M\u03A6", "M1Mo", "MYO",
"EC3", "DC", "EC2", "TC1-Cd8", "TC2-Cd4", "MAC-TR", "Mural", "M2M\u03A6-EC", "Cyc",
"MAC6", "MAC-IFNIC", "MAC7", "MAC8", "F-EC", "F-WntX", "EC-L1", "NKC", "EC-L2", "BC-TC", "Glial")
tip.aggr@ident = plyr::mapvalues(x = tip.aggr@ident, from = names(table(tip.aggr@ident)), to = new.labels)
col.set <- c("#c10023", "#008e17", "#fb8500", "#f60000", "#fde800", "#bc9000","#4ffc00", "#00bcac", "#0099cc",
"#D35400", "#00eefd", "#5f777f", "#cf6bd6", "#99cc00", "#aa00ff", "#ff00ff", "#ffb600", "#0053c8",
"#f2a287","#ffb3ff", "#800000", "#77a7b7", "#630099", "#00896e", "#ffba4f", "#00cc99", "#a81c0d", "#00ffae", "#FE0092")
TSNEPlot(tip.aggr, do.label = TRUE, pt.size = 0.5, colors.use = col.set)
hybrid.cells = names(aggr@ident)[as.character(aggr@ident) %in% c("M2M\u03A6-EC", "F-EC", "EC-L1", "EC-L2", "BC-TC")]
length(hybrid.cells)
hybrid.cells = names(tip.aggr@ident)[as.character(tip.aggr@ident) %in% c("M2M\u03A6-EC", "F-EC", "EC-L1", "EC-L2", "BC-TC")]
length(hybrid.cells)
cells.use = setdiff(tip.aggr@cell.names, hybrid.cells)
tip.aggr = FilterCells(tip.aggr, subset.names = NULL, cells.use = cells.use)
col.set.update <- c("#fb8500", "#ff0000", "#008e17", "#fde800", "#4ffc00", "#bc9000", "#0099cc", "#00bcac",
"#00eefd", "#5f777f", "#cf6bd6", "#D35400", "#99cc00","#aa00ff", "#ff00ff", "#0053c8",
"#f2a287", "#800000", "#77a7b7", "#00896e", "#ffb3ff", "#00cc99", "#FE0092")
TSNEPlot(tip.aggr, do.label = TRUE, colors.use = col.set.update)
col.set.update <- c("#c10023", "#008e17", "#fb8500", "#f60000", "#fde800", "#bc9000","#4ffc00", "#00bcac", "#0099cc",
"#D35400", "#00eefd", "#5f777f", "#cf6bd6", "#99cc00", "#aa00ff", "#ff00ff", "#0053c8",
"#f2a287","#ffb3ff", "#800000", "#77a7b7", "#00896e", "#00cc99", "#FE0092")
TSNEPlot(tip.aggr, do.label = TRUE, colors.use = col.set.update)
hybrid.cl = c("M2M\u03A6-EC", "F-EC", "EC-L1", "EC-L2", "BC-TC")
cl.keep = setdiff(new.labels, hybrid.cl)
tip.aggr@ident = factor(tip.aggr@ident, levels = cl.keep)
TSNEPlot(tip.aggr, do.label = TRUE, colors.use = col.set.update)
TSNEPlot(tip.aggr, do.label = TRUE, colors.use = col.set.update, pt.size = 0.75)
setwd("~/Dropbox/New format of the single cell paper 2018/eLife revision/eLife Supplementary files/Source code file 1 - R code/CellCommunication")
library(plyr)
library(network)
library(tidygraph)
library(igraph)
library(ggraph)
library(scales)
library(STRINGdb)
library(Seurat)
library(progress)
source("ligand_receptor_functions.R")
col.set <- c("#c10023", "#008e17", "#fb8500", "#f60000", "#fde800", "#bc9000","#4ffc00", "#00bcac", "#0099cc",
"#D35400", "#00eefd", "#5f777f", "#cf6bd6", "#99cc00", "#aa00ff", "#ff00ff", "#0053c8",
"#f2a287","#ffb3ff", "#800000", "#77a7b7", "#00896e", "#00cc99", "#FE0092")
TSNEPlot(tip.aggr, colors.use = col.set, pt.size = 0.5, do.label = TRUE)
mappings = read.csv("ensembl_mouse_human_orthologues.txt", header=TRUE)
mappings = mappings[mappings[, 4]!='' & mappings[, 5]!='', ]
mappings = mappings[, c(4,5)]
mappings = unique(mappings)
human.names = mappings[1, ]
ligand.receptor.pairs = read.csv("All.Pairs-Table 1.csv", header=TRUE, row.names=1)
ligand.receptor.pairs = ligand.receptor.pairs[ligand.receptor.pairs[ ,"Pair.Evidence"] %in% c("literature supported", "putative"), ]
receptors = as.character(ligand.receptor.pairs[, 3])
ligands = as.character(ligand.receptor.pairs[, 1])
all.genes = c(receptors, ligands)
ligand.receptor.mappings = mappings[mappings[, 1] %in% all.genes, ]
mouse.names = rownames(tip.aggr@data)
ligand.receptor.mappings = ligand.receptor.mappings[ligand.receptor.mappings[, 2] %in% mouse.names, ]
counts = table(ligand.receptor.mappings[, 1])
counts = counts[counts==1]
ligand.receptor.mappings = ligand.receptor.mappings[ligand.receptor.mappings[, 1] %in% names(counts), ]
rownames(ligand.receptor.mappings) = ligand.receptor.mappings[ ,1]
ligand.receptor.pairs = ligand.receptor.pairs[, c(1, 3)]
ligand.receptor.pairs.expressed = ligand.receptor.pairs[ligand.receptor.pairs[, 1] %in% rownames(ligand.receptor.mappings)
& ligand.receptor.pairs[, 2] %in% rownames(ligand.receptor.mappings), ]
clusters.use = names(table(tip.aggr@ident))
populations.use = names(table(tip.aggr@ident))
unique.genes = unique(c(as.character(ligand.receptor.pairs.expressed[,1]), as.character(ligand.receptor.pairs.expressed[, 2])))
unique.mouse.genes = unlist(lapply(unique.genes, function(x) as.character(ligand.receptor.mappings[x, 2])))
de.results = calculateClusterSpecificExpression(unique.mouse.genes, tip.aggr, threshold=0.1, cluster.set = clusters.use)
all.de.results = getGenePairExpressionValues(ligand.receptor.pairs.expressed, mapping.table = ligand.receptor.mappings,
cluster.names = clusters.use, gene.de.results = de.results)
dim(all.de.results)
indicies = all.de.results$Cluster1 %in% clusters.use & all.de.results$Cluster2 %in% clusters.use
ligand.receptor.edges = unique(all.de.results[indicies , c("Gene1", "Gene2")])
colnames(ligand.receptor.edges) = c("source", "target")
pair.identifiers = as.character(apply(ligand.receptor.edges, 1, function(x) paste0(x[1], ".", x[2])))
rownames(ligand.receptor.edges) = pair.identifiers
ligands = as.character(ligand.receptor.edges[, 1])
receptors = as.character(ligand.receptor.edges[, 2])
lr_score_table = get_STRING_table(ligands, receptors)
head(lr_score_table) ## print out some interactions
overlapping.genes = intersect(pair.identifiers, rownames(lr_score_table))
print(paste0(length(overlapping.genes), " overlaps between ligand-receptor map and STRINGdb"))
weights = lr_score_table[overlapping.genes, ]$Combined_score
weights = weights/1000
ligand.receptor.edges.overlap = ligand.receptor.edges[overlapping.genes, ]
ligand.receptor.edges.overlap$weight = weights
ligand.receptor.edges.overlap$relationship = "ligand.receptor"
ligands = as.character(ligand.receptor.edges[, 1])
receptors = as.character(ligand.receptor.edges[, 2])
cluster.ligand.edges = unique(all.de.results[indicies , c("Cluster1", "Gene1", "Gene1.Log2_fold_change")])
colnames(cluster.ligand.edges) = c("source", "target", "weight")
cluster.ligand.edges$relationship = "cluster.ligand"
cluster.ligand.edges$source = paste0("S:", cluster.ligand.edges$source)
## Mapping 'target' population to corresponding receptor
receptor.cluster.edges = unique(all.de.results[indicies , c("Gene2", "Cluster2", "Gene2.Log2_fold_change")])
colnames(receptor.cluster.edges) = c("source", "target", "weight")
receptor.cluster.edges$relationship = "receptor.cluster"
receptor.cluster.edges$target = paste0("T:", receptor.cluster.edges$target)
all.edges = rbind(as.matrix(ligand.receptor.edges.overlap), trimws(as.matrix(receptor.cluster.edges)), trimws(as.matrix(cluster.ligand.edges)))
write.csv(all.edges, file = "TIP_all_ligand_receptor_network_edges.csv", row.names = FALSE)
expression.edges = rbind(trimws(as.matrix(receptor.cluster.edges)), trimws(as.matrix(cluster.ligand.edges)))
write.csv(expression.edges, file = "TIP_ligand_receptor_weights.csv",  row.names = FALSE)
edge.score.file = "TIP_all_ligand_receptor_network_edges.csv"
score.table = read.csv(edge.score.file)
all.weights = score.table$weight
all.edges = score.table[, c("source", "target")]
populations.test = names(table(tip.aggr@ident))
complete.path.table = c()
for (s.pop in populations.test) {
for (t.pop in populations.test) {
source.population = paste0("S:", s.pop)
target.population = paste0("T:", t.pop)
this.path.table = getWeightedPaths(score.table, source.population = source.population,
target.population = target.population, min.weight = 1.5)
complete.path.table = rbind(complete.path.table, this.path.table)
}
}
dim(background.path.table)
dim(complete.path.table)
write.csv(complete.path.table, file = "TIP_network_paths_weight1.5.csv")
background.path.table = c()
for (s.pop in populations.test) {
for (t.pop in populations.test) {
source.population = paste0("S:", s.pop)
target.population = paste0("T:", t.pop)
this.path.table = getWeightedPaths(score.table, source.population = source.population,
target.population = target.population, min.weight = -100)
background.path.table = rbind(background.path.table, this.path.table)
}
}
dim(background.path.table)
write.csv(background.path.table, file = "TIP_background_paths.csv")
weights.file = "TIP_all_ligand_receptor_network_edges.csv"
weights.table= read.csv(weights.file)
ligand.receptor.table = weights.table[weights.table$relationship == "ligand.receptor", ]
rownames(ligand.receptor.table) = paste0(ligand.receptor.table$source, "_", ligand.receptor.table$target)
receptor.cluster.table = weights.table[weights.table$relationship == "receptor.cluster", ]
rownames(receptor.cluster.table) = paste0(receptor.cluster.table$source, "_", receptor.cluster.table$target)
cluster.ligand.table = weights.table[weights.table$relationship == "cluster.ligand", ]
rownames(cluster.ligand.table) = paste0(cluster.ligand.table$source, "_", cluster.ligand.table$target)
## Read in the background network file
completeFile = "TIP_background_paths.csv"
background.table = read.csv(completeFile, row.names = 1)
dim(background.table)
## Read in the filtered network file
thisFile = "TIP_network_paths_weight1.5.csv"
complete.path.table = read.csv(thisFile, row.names = 1)
clusters.use = names(table(tip.aggr@ident))
populations.test = names(table(tip.aggr@ident))
## Permutation testing for determing signifcant cell-cell connections
## Iterate through each combination of populations and do random selections
## of fold changes for ligands and receptors. Calculate empirical P-value.
pvalue.table = c()
for(s.pop in populations.test) {
this.source = paste0("S:", s.pop)
for (t.pop in populations.test) {
this.target = paste0("T:", t.pop)
## Add weights together
s.indicies = which(complete.path.table$Source == this.source)
t.indicies = which(complete.path.table$Target == this.target)
sub.table = complete.path.table[intersect(s.indicies, t.indicies), ]
num.paths = nrow(sub.table)
path.sum = sum(sub.table$Weight)
## Get number of total paths from background table
s.indicies = which(background.table$Source == this.source)
t.indicies = which(background.table$Target == this.target)
sub.table = background.table[intersect(s.indicies, t.indicies), ]
num_total = nrow(sub.table)
## Get weights from the background table
s.indicies.background = which(background.table$Source == this.source)
t.indicies.background = which(background.table$Target == this.target)
combined.indicies = intersect(s.indicies.background, t.indicies.background)
sub.background.table = background.table[combined.indicies, ]
ligand.receptor.set = paste0(sub.background.table$Ligand, "_", sub.background.table$Receptor)
## Now get the real weights of ligand-receptor connections
ligand.receptor.sub.table = ligand.receptor.table[ligand.receptor.set, ]
ppi.weights = ligand.receptor.sub.table$weight
random.paths = rep(NA, num_total)
random.weight.sums = rep(NA, num_total)
for (i in 1:100000) {
random.weights = getRandomFCWeights(ppi.weights, cluster.ligand.table, receptor.cluster.table)
random.weights = random.weights[random.weights >= 1.5]
this.random.weight.sum = sum(random.weights)
this.random.path.count = length(random.weights)
random.paths[i] = this.random.path.count
random.weight.sums[i] = this.random.weight.sum
}
print(paste0("Source ", s.pop, " to ", t.pop, ": "))
p.paths = sum(random.paths >= num.paths)/length(random.paths)
p.sum = sum(random.weight.sums >= path.sum)/length(random.weight.sums)
print(paste0("Num paths P-value = ", p.paths))
print(paste0("Path weights P-value = ", p.sum))
print("------------------------------------")
thisLine = data.frame(Source_population = s.pop, Target_population = t.pop, Num_paths = num.paths,
Num_paths_pvalue = p.paths, Sum_path = path.sum, Sum_path_pvalue = p.sum)
pvalue.table = rbind(pvalue.table, thisLine)
}
}
dim(aggr@data)
dim(tip.aggr@data)
pval.adj = p.adjust(pvalue.table$Sum_path_pvalue, method = "BH")
pvalue.table$Sum_path_padj = pval.adj
out.file = "Permutation_tests_TIP_network.csv"
write.csv(pvalue.table, file = out.file, row.names = FALSE)
path.sig.file = "Permutation_tests_TIP_network.csv"
sig.paths = read.csv(path.sig.file, sep = ",")
sig.paths$Source_population = sub(".:(.*)", "\\1", sig.paths$Source_population)
sig.paths$Target_population = sub(".:(.*)", "\\1", sig.paths$Target_population)
sig.paths = sig.paths[which(sig.paths$Sum_path_padj < 0.01), ]
nrow(sig.paths)
rownames(sig.paths) = paste0(sig.paths$Source_population, "_", sig.paths$Target_population)
collapsed.path.table = sig.paths[, c("Source_population", "Target_population", "Sum_path")]
colnames(collapsed.path.table) = c("Source", "Target", "Weight")
col.set <- c("#c10023", "#008e17", "#fb8500", "#f60000", "#fde800", "#bc9000","#4ffc00", "#00bcac", "#0099cc",
"#D35400", "#00eefd", "#5f777f", "#cf6bd6", "#99cc00", "#aa00ff", "#ff00ff", "#0053c8",
"#f2a287","#ffb3ff", "#800000", "#77a7b7", "#00896e", "#00cc99", "#FE0092")
names(col.set) =  c("M1M\u03A6", "EC1", "F-SL", "F-Act", "F-SH", "BC", "M2M\u03A6", "M1Mo", "MYO",
"EC3", "DC", "EC2", "TC1-Cd8", "TC2-Cd4", "MAC-TR", "Mural", "Cyc",
"MAC6", "MAC-IFNIC", "MAC7", "MAC8", "F-WntX", "NKC", "Glial")
all.weights = collapsed.path.table$Weight
all.edges = collapsed.path.table[, c("Source", "Target")]
edge.sources = all.edges$Source
edges.list = unlist(lapply(t(all.edges), function(x) c(x[1])))
lr.plot <- make_graph(edges.list, directed = TRUE)
E(lr.plot)$weight <- all.weights
populations.use = names(V(lr.plot))
col.set = col.set[populations.use]
cluster.colors = data.frame(cluster=populations.use, color=col.set)
rownames(cluster.colors) = cluster.colors$cluster
colfunc <- colorRampPalette(c("grey", "black"))
vertex.colors = c()
for (this.vertex in V(lr.plot)$name) {
if (this.vertex %in% cluster.colors$cluster) {
this.col = as.character(cluster.colors[cluster.colors$cluster==this.vertex, ]$color)
vertex.colors = append(vertex.colors, this.col)
} else{
vertex.colors = append(vertex.colors, "#e6e6e6")
}
}
edge.colours = c()
for (this.source in edge.sources) {
if (this.source %in% cluster.colors$cluster) {
this.col = as.character(cluster.colors[cluster.colors$cluster==this.source, ]$color)
edge.colours = append(edge.colours, this.col)
} else{
edge.colours = append(edge.colours, "#e6e6e6")
}
}
arrow.size = 0.6
arrow.width = 2.0
edge.multi = 0.02
E(lr.plot)$width <- as.numeric(all.weights)*edge.multi
layout = "layout_in_circle"
l <- do.call(layout, list(lr.plot))
par(mar=c(0,0,0,0)+.1)
plot(lr.plot, edge.curved = 0.2, vertex.color=vertex.colors,
layout=l, vertex.label.cex=1.1, edge.arrow.size=arrow.size, edge.arrow.width=arrow.width, vertex.size=18,
vertex.label.font=2, vertex.label.color="black", edge.color = edge.colours)
p.labels <- c("M1M\u03A6", "EC1", "F-SL", "F-Act", "F-SH", "BC", "M2M\u03A6", "M1Mo", "MYO",
"EC3", "DC", "EC2", "TC1-Cd8", "TC2-Cd4", "MAC-TR", "Mural", "Cyc",
"MAC6", "MAC-IFNIC", "MAC7", "MAC8", "F-WntX", "NKC", "Glial")
col.set <- c("#c10023", "#008e17", "#fb8500", "#f60000", "#fde800", "#bc9000","#4ffc00", "#00bcac", "#0099cc",
"#D35400", "#00eefd", "#5f777f", "#cf6bd6", "#99cc00", "#aa00ff", "#ff00ff", "#0053c8",
"#f2a287","#ffb3ff", "#800000", "#77a7b7", "#00896e", "#00cc99", "#FE0092")
edge.sources = factor(all.edges$Source, levels = p.labels)
edge.targets = factor(all.edges$Target, levels = p.labels)
source.counts = table(edge.sources)
target.counts = table(edge.targets)
outgoing.weights = c()
outgoing.weights = c()
incoming.weights = c()
for (this.pop in p.labels) {
outgoing = sum(sig.paths[which(sig.paths$Source_population == this.pop), ]$Sum_path)
outgoing.weights = append(outgoing.weights, outgoing)
incoming = sum(sig.paths[which(sig.paths$Target_population == this.pop), ]$Sum_path)
incoming.weights = append(incoming.weights, incoming)
}
ggData = data.frame(Population = factor(p.labels, levels = p.labels), Outbound = as.numeric(source.counts),
WeightsOut = outgoing.weights, Inbound = as.numeric(target.counts), WeightsIn = incoming.weights)
ggData$Population = factor(ggData$Population, levels = p.labels)
ggplot(ggData, aes(x = WeightsIn, y = WeightsOut, colour = Population)) + geom_point(size = 5) +
scale_colour_manual(values = col.set) + theme_bw(base_size = 15) + ylab("Total outgoing weights") +
xlab("Total incoming weights") + geom_text(data=subset(ggData, (WeightsOut > 1000 | WeightsIn > 1000)),
aes(x=WeightsIn, y = WeightsOut, label=Population), hjust=0.4, vjust=-0.6, colour = "black") +
theme(legend.position = c(0.7, 0.75)) + guides(color = guide_legend(override.aes = list(size=4), ncol = 3))
